I did this a bit flippantly before, but I want to fomalize the process by which we estimate the uncertainty on emulator predictions.
The biggest problem is at small scales, and I'm gonna look at those bins individually.
Fixes to try:
In [51]:
from pearce.emulator import SpicyBuffalo, LemonPepperWet, OriginalRecipe
from pearce.mocks import cat_dict
import numpy as np
from os import path
In [52]:
import matplotlib
#matplotlib.use('Agg')
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()
In [53]:
#xi gg
training_file = '/scratch/users/swmclau2/xi_zheng07_cosmo_lowmsat/PearceRedMagicXiCosmoFixedNd.hdf5'
#test_file= '/scratch/users/swmclau2/xi_zheng07_cosmo_test_lowmsat2/'
test_file = '/scratch/users/swmclau2/xi_zheng07_cosmo_test_lowmsat2/PearceRedMagicXiCosmoFixedNd_Test.hdf5'
In [54]:
em_method = 'gp'
split_method = 'random'
In [55]:
a = 1.0
z = 1.0/a - 1.0
In [56]:
bin_idx = 0
fixed_params = {'z':z, 'r': 0.09581734}#, 'cosmo': 0}#, 'r':24.06822623}
In [57]:
from glob import glob
In [58]:
hp = np.array([ 8.22518016e+00, -8.48981351e+00, 8.71510289e+00, -4.00883505e+00,
-1.20000000e+01, 6.39814872e+00, 2.41769925e+00, 1.28070602e+00,
-3.23773108e-01, 8.24276778e+00, 1.20000000e+01, -7.20251694e+00,
-1.20000000e+01, -5.17385710e+00, -4.80026082e-01, -8.76781990e-01,
-3.99855599e+00, 1.10634731e+01, -5.40163410e+00, 1.20000000e+01,
9.29994915e+00, -5.05724758e-01, 1.20000000e+01, -8.49500340e-03,
7.79086155e+00])
In [59]:
param_names = ['ombh2', 'omch2', 'w0', 'ns', 'ln10As', 'H0', 'Neff', 'logM0', 'sigma_logM', 'logM1', 'alpha']
In [60]:
pnames = ['bias', 'amp']
pnames.extend(param_names)
pnames.append('amp')
pnames.extend(param_names)
In [61]:
from collections import defaultdict
metric = defaultdict(list)
for val, pname in zip(hp, pnames):
metric[pname].append(val)
In [62]:
from time import time
np.random.seed(int(time()))
emu = OriginalRecipe(training_file, method = em_method, fixed_params=fixed_params,
custom_mean_function = 'linear', downsample_factor = 0.1, hyperparams = {'metric':metric})
In [ ]:
emu.downsample_x.shape
Out[ ]:
In [ ]:
pred_y, data_y = emu.goodness_of_fit(test_file, statistic = None)
In [ ]:
test_x, test_y, test_cov, _ = emu.get_data(test_file, emu.fixed_params)
t, old_idxs = emu._whiten(test_x)
In [ ]:
params = dict(zip(emu.get_param_names(), test_x[0,:]))
print emu.emulate(params)[0], test_y[0], data_y[0], pred_y[0]
In [ ]:
mean_func_at_params = emu.mean_function(t)
In [ ]:
print np.sqrt(np.mean(np.square((pred_y-data_y)/data_y)))
In [ ]:
resmat_flat = 10**pred_y - 10**data_y
datamat_flat = 10**data_y
In [ ]:
t_bin = t
acc_bin = np.abs(resmat_flat)/datamat_flat
In [ ]:
print np.sqrt(np.mean(np.square(acc_bin)))
print np.mean(acc_bin)
In [ ]:
percentiles = np.percentile(acc_bin, range(101))
norm_acc_bin = np.digitize(acc_bin, percentiles)
#norm_acc_bin = 100*((acc_bin - acc_bin.min())/acc_bin.max()).astype(int)
In [ ]:
palette = sns.diverging_palette(220, 20, n=len(percentiles)-1, as_cmap=True)
#sns.set_palette(palette)
In [ ]:
pnames = emu.get_param_names()
In [ ]:
In [ ]:
In [ ]: